/*	This program creates a dataset from the raw data for the Retirement Sample */


***** Set directories 
local dir_raw 		"~/Dropbox/Retirement gaming/raw"
local dir_retirraw 	"~/Dropbox/Retirement gaming/raw/Muestra2015"
local dir_do 		"~/Dropbox/Retirement gaming/do_dataverse"
local dir_clean 	"~/Dropbox/Retirement gaming/clean"
local dir_output 	"~/Dropbox/Retirement gaming/output/dataverse"
local dir_temp		"~/Dropbox/Retirement gaming/clean/temp" // folder to save temporary files that can be erased after running the do-file

local dataname "retirementsample.dta" 


***************************************
** 	INDIVIDUAL ID, COHORT AND GENDER **			
***************************************

// Select cohort and gender to shrink the dataset
use "`dir_retirraw'/personas_2017.dta", clear
* Gender (keep males)
replace sexo=1-(sexo-1) // 0 is female, 1 is male
collapse (min) sexo (max) fecha_nacimiento, by(id_persona)
drop if sexo!=1
*** Cohorts in sample
rename fecha_nacimiento Fnac
drop if Fnac<-20000 // impute missing birth date if before 1903 
g birth_month=mofd(Fnac)
format birth_month %tm
keep if birth_month>=tm(1941m4) & birth_month<tm(1971m4)
drop birth_month
count // 522,495 individuals
save "`dir_temp'/sample.dta", replace

use "`dir_retirraw'/personas_2017.dta", clear
* Gender (keep males)
replace sexo=1-(sexo-1) // 0 is female, 1 is male
collapse (min) sexo (max) fecha_nacimiento, by(id_persona)
*** Cohorts in sample
rename fecha_nacimiento Fnac
drop if Fnac<-20000 
g birth_month=mofd(Fnac)
drop Fnac
save "`dir_temp'/allind.dta", replace

* get info on whether a person reports ever to industria y comercio
use "`dir_retirraw'/puestos_2017.dta", clear 
keep id_persona aportacion
keep if aportacion==1
duplicates drop
rename aportacion indcom
merge 1:1 id_persona using "`dir_temp'/allind.dta"
drop if _m==1
drop _m
replace indcom=0 if indcom==.
order indcom, last
save "`dir_temp'/allind.dta", replace


* get info on whether a person reports last to industria y comercio
use id_persona col12 dia aportacion using "`dir_retirraw'/puestos_2017.dta", clear 
g anio = int(col12)
g mes= (col12-anio)*100
g mescargo=mofd(mdy(mes,dia,anio))
format mescargo %tm
keep id_persona mescargo aportacion
compress

sort id_persona mescargo
collapse (last) aportacion, by(id_persona)
keep if aportacion==1
duplicates drop
rename aportacion indcom_last
merge 1:1 id_persona using "`dir_temp'/allind.dta"
drop if _m==1
drop _m
replace indcom_last=0 if indcom_last==.
order indcom_last, last
save "`dir_temp'/allind.dta", replace

**********************************
** 	FIRM DATA **			
**********************************
use "`dir_retirraw'/empresas_2017.dta", clear
drop cant_coop cant_patrones cant_locales exoneracion id_contribuyente departamento
local vlist1 = "aportacion tipo_contribuyente ciiu "
foreach var of varlist `vlist1' {
	rename `var' old_`var'
	egen `var'=mode(old_`var'), by(id_empresa mescargo) maxmode
}
collapse (first) aportacion tipo_contribuyente ciiu (min) fecha_inicio (sum) cant_dependientes, by (id_empresa mescargo)
rename fecha_inicio fecha_inicio_empresa
save "`dir_temp'/firms.dta", replace

************************************
** RETIREMENT DATA **
************************************
* Retirement payments BPS by month and request number 
use "`dir_retirraw'/pasividades_2017.dta", clear
bys nro_solicitud: egen first_receipt=min(mescargo)
keep if mescargo==first_receipt
format first_receipt %tm
drop mescargo
tempfile firstpension
save `firstpension', replace

* Retirement files 
use "`dir_retirraw'/pasivos_2017.dta", clear
merge 1:1 nro_solicitud using `firstpension'
drop _merge // all match
keep if beneficio==20 // keep only contributory retirement
g incapacidad=(solicitud==9 | solicitud==27)
foreach X in solicitud inicio_benef fin_benef {
	replace fecha_`X'=. if fecha_`X'<-20000 
	sum fecha_`X', format
	gen mes_`X'=mofd(fecha_`X')
	format mes_`X' %tm
	drop fecha_`X'
}
g difib=first_receipt-mes_inicio_benef
replace mes_inicio_benef=first_receipt if difib<0
drop difib
merge m:1 id_persona using "`dir_temp'/allind.dta"
keep if _m==3
drop _m
drop if sexo!=1 // keep men only
foreach X in solicitud inicio_benef {
	g edad_`X' = floor((mes_`X'-birth_month)/12)
}

* Delete duplicates
*first, drop duplicates with same dates and type (keep first request nr)
duplicates tag id_persona mes_solicitud mes_inicio_benef solicitud, g(tag) // 
tab tag
bys id_persona mes_solicitud mes_inicio_benef solicitud: egen auxn=min(nro_solicitud)
drop if tag>0 & nro_solicitud>auxn
drop tag
drop auxn
*first, drop duplicates with same dates keeping regular retirement 
duplicates tag id_persona mes_solicitud mes_inicio_benef, g(tag) // 
tab tag
g comun=solicitud==1
bys id_persona mes_solicitud mes_inicio_benef: egen maxcom=max(comun)
drop if tag>0 & comun==0 & maxcom==1
drop tag
drop maxcom comun
*second, drop when the request date is the same but different start of benefits (keep first)
duplicates tag id_persona mes_solicitud solicitud, g(tag) // duplicates with same id, request date and type
bys id_persona mes_solicitud solicitud: egen auxs=min(mes_solicitud) // first request date
bys id_persona mes_solicitud solicitud: egen auxb=min(mes_inicio_benef) // first benefit date
drop if tag>0  & mes_solicitud==auxs & mes_inicio_benef>auxb // drop if duplicate request date but later benefit date
drop tag 
drop auxs auxb
*third, drop duplicate requests that happend before April 1996 
duplicates tag id_persona, g(tag) // 
drop if tag>0 & mes_solicitud<tm(1996m4)
drop tag
*fourth, drop duplicate requests that happend before age 55
duplicates tag id_persona, g(tag) // 
drop if tag>0 & edad_solicitud<55
drop tag
*fifth, keep first request if it happend after age 59
duplicates tag id_persona, g(tag) 
tab tag
bys id_persona: egen auxe=min(edad_solicitud)
bys id_persona: egen auxs=min(mes_solicitud)
bys id_persona: egen auxb=min(mes_inicio_benef)
drop if tag>0 & mes_solicitud>auxs & mes_inicio_benef>auxb & auxe>=59 
drop tag
drop auxe auxs auxb
*sixth, drop first request if it happend before age 59 and there is another request after that
duplicates tag id_persona, g(tag) 
tab tag
bys id_persona: egen auxe=min(edad_solicitud)
bys id_persona: egen auxs=min(mes_solicitud)
bys id_persona: egen auxb=min(mes_inicio_benef)
bys id_persona: egen auxe2=max(edad_solicitud)
drop if tag>0 & mes_solicitud==auxs & mes_inicio_benef==auxb & edad_solicitud<59 & auxe2>=59 
drop tag
drop auxe auxs auxb auxe2
*again, keep first request if it happend after age 59
duplicates tag id_persona, g(tag) 
tab tag
bys id_persona: egen auxe=min(edad_solicitud)
bys id_persona: egen auxs=min(mes_solicitud)
bys id_persona: egen auxb=min(mes_inicio_benef)
drop if tag>0 & mes_solicitud>auxs & mes_inicio_benef>auxb & auxe>=59 
drop tag
drop auxe auxs auxb
* keep first request 
duplicates tag id_persona, g(tag) 
bys id_persona: egen auxn=min(nro_solicitud)
drop if tag>0 & nro_solicitud>aux
drop auxn
drop tag

drop solicitud beneficio
drop mes_fin causal_baja stip
rename id_persona i

save "`dir_temp'/retir.dta", replace


************************************
** SOCIAL INSURANCE BENEFITS DATA **
************************************

* Benefit requests (unemployment, sick & maternity leave) by id of individual, has date of start and end
use "`dir_retirraw'/activos_2017.dta", clear
keep id_persona nro_solicitud beneficio
merge m:1 id_persona using "`dir_temp'/sample.dta"
keep if _merge==3
drop _merge
tempfile socialins
save `socialins'

* Amounts benefits  (unemployment, sick & maternity leave) by month and request nr
use "`dir_retirraw'/prestaciones_activos_2017.dta", clear
merge m:1 nro_solicitud using `socialins'
keep if _merge==3
drop _m
drop nro_solicitud
rename id_persona i
rename mescargo t
rename prestacion_bps remC1
order i t
collapse (sum) remC1, by(i t beneficio) 
gen SD	= beneficio==1
gen mat	= beneficio==3
gen enf	= beneficio==2
drop beneficio
sort i t
collapse (sum) remC1 SD mat enf, by(i t)
save "`dir_temp'/socialins.dta", replace
 
** Divide data in parts to append to contribution history **
*Part 1
use if i<=5200000 using "`dir_temp'/socialins.dta", clear
save "`dir_temp'/socialins_p1.dta", replace
*Part 2
use if i>5200000 & i<=5400000 using "`dir_temp'/socialins.dta", clear
save "`dir_temp'/socialins_p2.dta", replace
*Part 3
use if i>5400000 & i<=5600000 using "`dir_temp'/socialins.dta", clear
save "`dir_temp'/socialins_p3.dta", replace
*Part 4
use if i>5600000 & i<=5800000 using "`dir_temp'/socialins.dta", clear
save "`dir_temp'/socialins_p4.dta", replace
*Part 5
use if i>5800000 & i<=6000000 using "`dir_temp'/socialins.dta", clear
save "`dir_temp'/socialins_p5.dta", replace
*Part 6
use if i>6000000 & i<=6600000 using "`dir_temp'/socialins.dta", clear
save "`dir_temp'/socialins_p6.dta", replace
*Part 7
use if i>6600000   using "`dir_temp'/socialins.dta", clear
save "`dir_temp'/socialins_p7.dta", replace


**********************************
** CONTRIBUTION HISTORY **
**********************************

use "`dir_retirraw'/puestos_2017.dta", clear 
drop fecha_nacimiento sexo
merge m:1 id_persona using "`dir_temp'/sample.dta"
keep if _merge==3
drop _merge

g anio = int(col12)
g mes= (col12-anio)*100
g mescargo=mofd(mdy(mes,dia,anio))
format mescargo %tm
drop dia mes anio col12

g fecha_ingreso=date(col13,"DMY",2017)
format fecha_ingreso %d
drop col13

g fecha_egreso=date(col14,"DMY",2017)
format fecha_egreso %d
drop col14

rename col15 causal_egreso
label define baja 	1	voluntario 2 despido 3	fallecimiento 4	termino_contrato 5	jubilacion 7 oficio 8 baja_cambio_titular 10 termino_mandato 11 cese_docentes /*
*/ 					12	cese_edad 13 cese_mala_conducta 15	fallecimiento_directo 22 cesante_incapacidad 23	cesante_incentivo 31 cese_subsidio 32 cese_incentivo /*
*/					33	cese_comision 42 RD42 50	otros 80 baja_declaracion_trabajador 98	baja_por_error 99 alta_por_error
label var causal_egreso baja

g bonificacion=.
replace bonificacion=2/1 if col16==10
replace bonificacion=3/2 if col16==1 | col16==6 | col16==26 | col16==37 | col16==47 | col16==27 
replace bonificacion=4/3 if col16==50 | col16==2 | col16==20 | col16==42 | col16==38 | col16==39 | col16==44 |col16==52 | col16==58 | col16==7
replace bonificacion=7/5 if col16==4 | col16==11 | col16==40 | col16==43 | col16==46
replace bonificacion=5/4 if col16==3 | (col16==5 & mescargo<m(1998m12)) | col16==9
replace bonificacion=6/5 if col16==41
replace bonificacion=7/6 if col16==8 | col16==21 | col16==22 | col16==23
replace bonificacion=9/8 if col16==51 |col16==24 | col16==25
replace bonificacion=1 if col16==99 | col16==14 | col16==15 | col16==12 | col16==13 | col16==16 | col16==28 | col16==29 | col16==30 | col16==31 | col16==32 | col16==33 | col16==34 | col16==35
drop col16

rename col17 exoneracion
rename col18 dias_trabajados
rename col19 horas_trabajadas
rename col20 horas_semanales
rename col21 seguro_salud
rename col22 vinculo_funcional
rename col23 tipo_remuneracion
rename col24 categoria
rename col25 remuneracion1
rename col26 remuneracion2
rename col27 remuneracion3

* Social insurance benefits in the contributions data
g SD=(id_empresa=="DESEMPLEO")
g enf=(id_empresa=="ENFERMEDAD")
g mat=(id_empresa=="MATERNIDAD")
replace id_empresa="" if id_empresa=="DESEMPLEO"|id_empresa=="ENFERMEDAD"|id_empresa=="MATERNIDAD"

*********************************************
** MERGE CONTRIBUTION HISTORY TO FIRM DATA **
*********************************************
merge m:1 id_empresa mescargo using "`dir_temp'/firms.dta"
drop if _merge==2 // drop firms that correspond to individuals not in sample
compress
save  "`dir_temp'/sample2015_merged.dta", replace


*********************************************
** Divide data in parts to facilitate data cleaning **
*Part 1
use if id_persona<=5200000 using "`dir_temp'/sample2015_merged.dta", clear
save "`dir_temp'/sample2015_p1.dta", replace
*Part 2
use if id_persona>5200000 & id_persona<=5400000 using "`dir_temp'/sample2015_merged.dta", clear
save "`dir_temp'/sample2015_p2.dta", replace
*Part 3
use if id_persona>5400000 & id_persona<=5600000 using "`dir_temp'/sample2015_merged.dta", clear
save "`dir_temp'/sample2015_p3.dta", replace
*Part 4
use if id_persona>5600000 & id_persona<=5800000 using "`dir_temp'/sample2015_merged.dta", clear
save "`dir_temp'/sample2015_p4.dta", replace
*Part 5
use if id_persona>5800000 & id_persona<=6000000 using "`dir_temp'/sample2015_merged.dta", clear
save "`dir_temp'/sample2015_p5.dta", replace
*Part 6
use if id_persona>6000000 & id_persona<=6600000 using "`dir_temp'/sample2015_merged.dta", clear
save "`dir_temp'/sample2015_p6.dta", replace
*Part 7
use if id_persona>6600000  using "`dir_temp'/sample2015_merged.dta", clear
save "`dir_temp'/sample2015_p7.dta", replace




*********************************************
** DATA CLEANING  **
*********************************************

forvalues p=1/7 {
	use "`dir_temp'/sample2015_p`p'.dta", clear
	* Flag individuals with missing data on firms (we need firm data to classify self-employed and employees by firm size)
	replace _merge=3 if id_empresa=="" // do this to avoid dropping individuals with social insurance benefits 
	bys id_persona: egen minmerge=min(_merge) 
	drop if minmerge<3
	drop _merge minmerge
	* Rename variables 
	rename id_persona i
	rename id_empresa j
	rename mescargo t
	order i j t
	rename id_obra obra
	rename aportacion aportaci
	rename tipo_contribuyente Tipocontr
	rename cant_dependient ndep
	rename fecha_ingreso Fing
	rename fecha_egreso Fegr
	rename causal_egreso causal
	rename dias_trabajados diasTRA
	rename horas_trabajadas horasTRA
	rename horas_semanales horasSEM
	rename vinculo_funcional vf
	rename tipo_remuneracion tipREM
	rename remuneracion1 remC1
	rename remuneracion2 remC2
	rename remuneracion3 remC3
	destring j, replace
	
	drop tipo_documento pais nacionalidad  seguro_salud

	
	** DATA CLEANING **
	
	run "`dir_do'/ipc.do"

	** STATUS **
	g status		=1 if vf==1	
	replace status	=2 if vf==2 | vf==3 | vf==4 | vf==5 | vf==6 | vf==7 | vf==34 
	replace status	=3 if vf==12  
	replace status	=4 if vf==13 | vf==14 | (vf>=35 & vf<=38) 
	replace status	=5 if vf==15 | vf==16 | vf==17 | vf==49  | vf==64  
	replace status	=6 if vf==23 | vf==24 | vf==25 | vf==31 | vf==56 
	replace status	=7 if vf==20 | vf==47 | vf==53 | vf==57 | vf==61 |  (vf>=65 & vf<=68) | vf==87 
	replace status	=8 if vf==76 
	replace status	=9 if status==. & vf!=. 
	replace status	=. if vf==.
	label define status 1 "Sole owner" 2 "Partner, director or administrator" 3 "Employee" 4 "Temp worker" 5 "Hourly worker" 6 "On leave, UI or disability" 7 "Public sector" 8 "Private Education" 9 "Other"
	label values status status
	g status_1=status==1
	g status_3=status==3
	label var status_1 "Sole owner"
	label var status_3 "Employee"

	* Type of remuneration
	replace   tipREM=. if tipREM==7 
	label define tipREM 1 "monthly" 2 "daily" 3 "pieceworker" 4 "commission" 5 "mixed" 6 "unpaid"  
	label values tipREM tipREM

	replace horasSEM=. if horasSEM==99
	replace horasTRA=. if horasTRA==99
	replace diasTRA=. if  diasTRA==99
	foreach X in horasSEM horasTRA diasTRA {
	replace `X'=0 if `X'==.
	}

	*Reason for leaving payroll
	g causal_5=causal==5
	drop causal
	label var causal_5 "Baja x jubilacion"

	** EARNINGS, UNEMPLOYMENT INSURANCE, SICKNESS AND MATERNITY LEAVE **
	** Generate earnings in thousands of pesos of Dec 2015
	foreach X in remC1 remC2 remC3 {
		replace `X'=(`X'/1000)/ipc
		replace `X'=0 if `X'==.
	}
	egen W=sum(remC1), by(i j t)
	sum W
	replace W=0 if W<0

	* Mark individual-months with paid leave
	bys i t: egen unemployment=max(SD)
	bys i t: egen maternity=max(mat)
	bys i t: egen sickness=max(enf)
	* Amounts of paid leave
	cap drop aux
	g aux=0
	replace aux=remC1 if SD==1
	bys i t: egen amt_unemployment=sum(aux)
	replace amt_unemployment=0 if amt_unemployment<0
	replace aux=0
	replace aux=remC1 if mat==1
	bys i t: egen amt_maternity=sum(aux)
	replace amt_maternity=0 if amt_maternity<0
	replace aux=0
	replace aux=remC1 if enf==1
	bys i t: egen amt_sickness=sum(aux)
	replace amt_sickness=0 if amt_sickness<0
	drop aux
	sum amt_* if j!=.
	compress
	save "`dir_temp'/temp`p'.dta", replace
}


forvalues p=1/7 {		
	***********************************************
	*********** KEEP ONE LINE PER i j t ***********
	***********************************************

	use "`dir_temp'/temp`p'.dta", clear 
	**First, create summary variables we want to keep 
	foreach X of varlist remC1 remC2 remC3 horasTRA diasTRA horasSEM {
		bys i j t: egen `X'_sum = sum(`X')
	}
	foreach X of varlist remC1 horasTRA diasTRA horasSEM  {
		bys i j t: egen `X'_max = max(`X')
	}
	foreach X of varlist aportaci tipREM  {
		bys i j t: egen `X'_min = min(`X')
	}

	** drop duplicates  i j t***
	*first, drop unpaid jobs
	drop if W==0 & tipREM==6

	duplicates tag i j t, gen(dup)
	drop if aportaci_min==1 & aportaci!=1 & dup!=0 
	drop dup
	duplicates tag i j t, gen(dup)
	egen aw=sum(remC1), by(i j t)
	egen maxw=max(remC1), by(i j t)
	g flag_drop_remC1=remC1<maxw & remC1!=. & maxw!=. & dup!=0
	drop if remC1<maxw-.001 & remC1!=. & maxw!=. & dup!=0
	drop aw maxw dup
	duplicates tag i j t, gen(dup)
	set seed 3000 
	generate aleatorio = (1-(-1))*runiform() + (-1)
	egen maxa=max(aleatorio), by (i j t)
	g flag_drop_aleat=aleatorio<maxa & dup!=0 
	drop if aleatorio<maxa-.0001 & dup!=0
	drop maxa dup

	** EARNINGS INCLUDING PAID LEAVE and UI (added to main job) **
	* Main job 
	g aux1=remC1 if j!=.
	replace aux1=0 if j==.
	bys i t: egen aux2= rank(aux1), field
	g mainjob=aux2==1
	replace mainjob=0 if j==.
	drop aux1 aux2
	bys i t: egen anymj=max(mainjob)
	g aux=j if mainjob==1
	replace aux=0 if mainjob!=1
	bys i t: egen mainjobj=max(aux)
	drop aux
	g flag=mainjobj==0
	sort i t j
	bys i: replace mainjobj=mainjobj[_n-1] if mainjobj==0 & _n>1
	replace j=mainjobj if flag==1  & mainjobj!=0
	* drop people who only ever have social insurance benefits, no main job
	bys i: egen evermainjob=max(mainjob)
	bys i: egen minj=min(j)
	g aux3=minj!=.
	drop if mainjobj==0 & evermainjob==0 & aux3==0
	drop aux3 minj
	drop evermainjob
	*drop individuals who start dataset on social insurance and cannot assign a firm to them
	g aux=mainjobj==0 
	bys i: egen aux2=max(aux)
	drop if aux2==1
	drop aux aux2 
	drop anymj
	replace mainjob=1 if flag==1 

	* Create Wben and drop lines that correspond to benefits
	g ben=amt_unemployment+amt_maternity+amt_sickness
	g Wben=W
	replace Wben=W+ben if mainjob==1 & flag==0
	drop if j==. 
	sum W Wben

	* Drop observations without earnings or social insurance benefits
	drop if Wben==0

	* Zeros to missing for earnings
	replace W=. if W==0 
	replace Wben=. if Wben==0

	** LABEL EARNINGS AND OTHER PAYMENTS **
	label var remC1 "Earnings"
	label var remC2 "Aguinaldo"
	label var remC3 "Retroactive payments"
	label var W		"Earnings"
	label var Wben 	"Earnings plus social insurance benefits"

	** HOURS OF WORK PER MONTH **
	sum dias* horas*
	replace diasTRA_sum=30 if diasTRA_sum>=30 & diasTRA_sum!=. // cap days in month at 30
	replace diasTRA_max=30 if diasTRA_max>=30 & diasTRA_max!=. // cap days in month at 30
	replace horasSEM_max=72 if horasSEM_max>72 & horasSEM_max!=. // cap weekly hours at 12x6
	replace horasSEM_sum=72 if horasSEM_sum>72 & horasSEM_sum!=. // cap weekly hours at 12x6
	foreach var of varlist diasTRA* horasTRA* horasSEM* {
		replace `var'=0 if `var'<0
	}
	sum dias* horas*

	* MONTHLY PAY (report typicall weekly hours)
	g hrsmonth			= diasTRA_sum * horasSEM_max/7 	if tipREM_min==1 & diasTRA_sum>0  & horasSEM_sum>0
	replace hrsmonth 	= diasTRA_sum * 40/7 			if tipREM_min==1 & diasTRA_sum>0 & horasSEM_sum==0 & W>0 & horasTRA_sum==0 // I assume that salaried workers without hours reported work full time
	replace hrsmonth 	= 0 							if tipREM_min==1 & diasTRA_sum==0 & horasSEM_sum==0 & (horasTRA_sum==0 | W==0)
	replace hrsmonth 	= 30 * horasSEM_max/7 			if tipREM_min==1 & diasTRA_sum==0 & horasSEM_sum>0 & W>0
	replace hrsmonth 	= diasTRA_sum * horasTRA_sum  	if tipREM_min==1 & diasTRA_sum>0 & horasSEM_sum==0 & W>0 & horasTRA_sum>0 & horasTRA_sum<=12

	* DAILY PAY (about 1/2 report typicall weekly hours)
	replace hrsmonth	= diasTRA_sum * horasSEM_max/7 	if tipREM_min==2 & diasTRA_sum>0  & horasSEM_sum>0
	replace hrsmonth	= diasTRA_sum * 8 				if tipREM_min==2 & diasTRA_sum>0  & horasSEM_sum==0 & horasTRA_sum==0 // if hours not reported, assume full time
	replace hrsmonth 	= 0 							if tipREM_min==2 & diasTRA_sum==0 & horasSEM_sum==0 & (horasTRA_sum==0 | W==0) 
	replace hrsmonth 	= 30 * horasSEM_max/7 			if tipREM_min==2 & diasTRA_sum==0 & horasSEM_sum>0 & horasTRA_sum==0 & W>0	// I assume the worked the full month if weekly hrs are reported and days=0
	replace hrsmonth 	= diasTRA_sum * horasTRA_sum  	if tipREM_min==2 & diasTRA_sum>0 & horasSEM_sum==0 & W>0 & horasTRA_sum>0 & horasTRA_sum<=12
	replace hrsmonth 	= horasTRA_sum 					if tipREM_min==2 & diasTRA_sum==0 & horasSEM_sum==0 & W>0 & horasTRA_sum>12 

	* PIECEWISE PAY (about 1/2 report typicall weekly hours)
	replace hrsmonth	= diasTRA_sum * horasSEM_max/7 	if tipREM_min==3 & diasTRA_sum>0  & horasSEM_sum>0
	replace hrsmonth	= diasTRA_sum * 40/7  			if tipREM_min==3 & diasTRA_sum>0  & horasSEM_sum==0 & horasTRA_sum==0 // if hours not reported, assume full time
	replace hrsmonth 	= 0 							if tipREM_min==3 & diasTRA_sum==0 & horasSEM_sum==0 & horasTRA_sum==0 & W==0
	replace hrsmonth 	= 30 * horasSEM_max/7 			if tipREM_min==3 & diasTRA_sum==0 & horasSEM_sum>0 & horasTRA_sum==0 & W>0	// I assume the worked the full month if weekly hrs are reported and days=0
	replace hrsmonth 	= horasTRA_sum * diasTRA_sum	if tipREM_min==3 & diasTRA_sum>0 & horasSEM_sum==0 & W>0 & horasTRA_sum>0 & horasTRA_sum<=12
	replace hrsmonth 	= horasTRA_sum 					if tipREM_min==3 & diasTRA_sum==0 & horasSEM_sum==0 & W>0 & horasTRA_sum>12 

	* COMMISSION PAY OR MIXED 
	replace hrsmonth	= diasTRA_sum * horasSEM_max/7 	if tipREM_min>=4 & tipREM_min<=5 & diasTRA_sum>0  & horasSEM_sum>0
	replace hrsmonth 	= diasTRA_sum * 40/7 			if tipREM_min>=4 & tipREM_min<=5 & diasTRA_sum>0 & horasSEM_sum==0 & W>0 & horasTRA_sum==0 // I assume that salaried workers without hours reported work full time
	replace hrsmonth 	= 0 							if tipREM_min>=4 & tipREM_min<=5 & diasTRA_sum==0 & horasSEM_sum==0 & horasTRA_sum==0 & W==0
	replace hrsmonth 	= 30 * horasSEM_max/7 			if tipREM_min>=4 & tipREM_min<=5 & diasTRA_sum==0 & horasSEM_sum>0 & horasTRA_sum==0 & W>0	// I assume the worked the full month if weekly hrs are reported and days=0
	replace hrsmonth 	= diasTRA_sum * horasTRA_sum 	if tipREM_min>=4 & tipREM_min<=5 & diasTRA_sum>0 & horasSEM_sum==0 & W>0 & horasTRA_sum>0 & horasTRA_sum<=12
	replace hrsmonth 	= diasTRA_sum * horasTRA_sum/7 	if tipREM_min>=4 & tipREM_min<=5 & diasTRA_sum>0 & horasSEM_sum==0 & W>0 & horasTRA_sum>=35 & horasTRA_sum<=60 

	* UNPAID  
	replace hrsmonth	= diasTRA_sum * horasSEM_max/7 	if tipREM_min==6 & diasTRA_sum>0  & horasSEM_sum>0
	replace hrsmonth 	= diasTRA_sum * 40/7 			if tipREM_min==6 & diasTRA_sum>0 & horasSEM_sum==0 & horasTRA_sum==0 // I assume that unpaid workers without hours reported work full time
	replace hrsmonth 	= 0 							if tipREM_min==6 & diasTRA_sum==0 & horasSEM_sum==0 & horasTRA_sum==0 
	replace hrsmonth 	= 30 * horasSEM_max/7 			if tipREM_min==6 & diasTRA_sum==0 & horasSEM_sum>0 & horasTRA_sum==0 	// I assume the worked the full month if weekly hrs are reported and days=0
	replace hrsmonth 	= diasTRA_sum * horasTRA_sum 	if tipREM_min==6 & diasTRA_sum>0 & horasSEM_sum==0 & horasTRA_sum>0 & horasTRA_sum<=12

	* SET HOURS TO ZERO IF MISSING AND EARNINGS==0 (and not unpaid worker)
	replace hrsmonth	= 0								if hrsmonth==. & W==0 & tipREM_min!=6 
	
	drop horasTRA* horasSEM* diasTRA* remC1_max remC1_sum
	drop tipREM
	rename tipREM_min tipREM

	***********************************************
	*********** KEEP ONE LINE PER i t ***********
	***********************************************
	
	* Main job, after changes in sample
	rename  mainjob mainjob_old
	g aux1=remC1
	bys i t: egen aux2= rank(aux1), field
	g mainjob=aux2==1
	drop aux1 aux2
	tab mainjob mainjob_old
	tab flag if mainjob!=mainjob_old
	replace Wben=W+ben if mainjob_old==0 & mainjob==1 & ben>0
	drop mainjob_old mainjobj flag
	* Keep only main job and drop duplicates
	duplicates tag i t, g(tag)
	tab tag mainjob
	drop if mainjob==0 & tag>0
	drop tag
	duplicates tag i t, g(tag)
	drop if tag>0 & remC1==0
	drop tag
	duplicates tag i t, g(tag)
	bys i: egen maxtag=max(tag)
	drop if maxtag>0 // drop individuals for whom main job cannot be determined
	drop tag maxtag 
	
	***********************************************
	*********** KEEP SAMPLE ONLY ***********
	***********************************************
	*** AGE & MONTH OF OBSERVATION CENTERED 
	*birthdate
	format Fnac %td
	g birth_month=mofd(Fnac)
	format birth_month %tm
	* age in months and years
	g agemonths = t-birth_month
	g age = agemonths/12
	g agedisc = floor(age) // age in integer years
	* age in months centered at 50's birthday
	g refbday_month=birth_month+(12*50)
	format refbday_month %tm
	g agemonths_centered = t-refbday_month // this is number of months after ref age birthday

	* mark observations in sample age (45 to 57)
	g marksage = agemonths_centered>=-60 & agemonths_centered<96
	bys i: egen insage=max(marksage) 
	
	* count observations in industria & comercio, in sample age and working as salaried employee or self-employed
	cap drop aux
	g aux=aportaci==1 & (status_1==1 | status_3==1) & tipREM==1
	bys i t: egen indcom=max(aux)
	bys i: egen nrmonths_indcom=sum(indcom) // counts total nr months reported in industria&comercio for each individual
	g indcomage=indcom*marksage
	bys i: egen nrmonths_indcomage=sum(indcomage)  // counts total nr months reported in industria&comercio for each individual in the sample age (45 to 57)
	
	g sample_indcomage=nrmonths_indcomage>=6 // sample are those observed at least 6 months reporting to industria & comercio at ages 45 to 57
	
	keep if sample_indcomage==1

	drop aux marksage indcom nrmonths_indcom nrmonths_indcomage sample_indcomage

	cap drop aux*
	drop insage indcomage remC1 remC2 remC3 amt_* 
	drop SD enf mat
	drop aportaci_min
	
	compress
	save "`dir_temp'/sample2015_p`p'.dta", replace

}


*********************************************
** MERGE RETIREMENT DATA AND APPEND PARTS **
*********************************************	

forvalues p=1/7 {
	use "`dir_temp'/sample2015_p`p'.dta"
	merge m:1 i using "`dir_temp'/retir.dta"
	drop if _m==2
	bys i: g markeri=_n==1 // mark 1 obs per individual
	rename _m merge_retir
	tab merge_retir markeri
	save "`dir_temp'/sample2015_p`p'.dta", replace
}

clear
forvalues p=1/7 {
	append using "`dir_temp'/sample2015_p`p'.dta"
}

rename mes_inicio_benef 	month_benefits
rename edad_inicio_benef 	age_benefits
rename mes_solicitud		month_request
rename edad_solicitud 		age_request

save "`dir_clean'/`dataname'", replace // this is the final dataset


clear all
exit





